import os
import re
import pypandoc
from pathlib import Path

def preprocess_markdown(content):
    # 保存代码块
    code_blocks = []
    code_pattern = re.compile(r'(```.*?```)', re.DOTALL)
    content_without_code = code_pattern.sub(lambda m: f'{{{{CODE_BLOCK_{len(code_blocks)}}}}}' or code_blocks.append(m.group(0)), content)

    # 保存数学块
    math_blocks = []
    math_pattern = re.compile(r'(\$\$.*?\$\$|\$.*?\$)', re.DOTALL)
    content_without_math = math_pattern.sub(lambda m: f'{{{{MATH_BLOCK_{len(math_blocks)}}}}}' or math_blocks.append(m.group(0)), content_without_code)

    # 处理剩余内容中的目标命令，转为代码块
    processed_lines = []
    target_commands = ['\\mathbf', '\\mathrm', '\\scriptstyle']
    for line in content_without_math.split('\n'):
        if any(cmd in line for cmd in target_commands):
            processed_lines.append(f'```{line}```')
        else:
            processed_lines.append(line)
    processed_content = '\n'.join(processed_lines)

    # 恢复数学块，并处理需要替换的
    for i in reversed(range(len(math_blocks))):
        placeholder = f'{{{{MATH_BLOCK_{i}}}}}'
        math_block = math_blocks[i]
        if any(cmd in math_block for cmd in target_commands):
            # 替换$为```并去除首尾的$
            content = math_block.strip('$')
            new_block = f'```{content}```'
            processed_content = processed_content.replace(placeholder, new_block, 1)
        else:
            processed_content = processed_content.replace(placeholder, math_block, 1)

    # 恢复代码块
    for i in reversed(range(len(code_blocks))):
        placeholder = f'{{{{CODE_BLOCK_{i}}}}}'
        processed_content = processed_content.replace(placeholder, code_blocks[i], 1)

    return processed_content

def convert_md_to_pdf():
    base_dir = Path(__file__).parent.resolve()
    for sub_dir in base_dir.iterdir():
        if sub_dir.is_dir():
            print(f"\n正在处理目录: {sub_dir.name}")
            target_files = list(sub_dir.glob("*md优化-0000.md"))
            if not target_files:
                print(f"⚠️ 未找到目标文件：{sub_dir}/[...]md优化-0000.md")
                continue
            for md_file in target_files:
                pdf_path = md_file.with_suffix('.pdf')
                # 预处理Markdown文件
                with open(md_file, 'r', encoding='utf-8') as f:
                    original_content = f.read()
                processed_content = preprocess_markdown(original_content)
                temp_md = md_file.with_name(md_file.stem + '_temp.md')
                with open(temp_md, 'w', encoding='utf-8') as f:
                    f.write(processed_content)
                try:
                    # 使用临时文件进行转换
                    output = pypandoc.convert_file(
                        str(temp_md),
                        'pdf',
                        outputfile=str(pdf_path),
                        format='markdown',
                        extra_args=[
                            '--pdf-engine=xelatex',
                            '-V', 'CJKmainfont=SimSun',
                            '-V', 'CJKboldfont=SimHei',
                            '-V', 'CJKoptions=BoldFont=SimHei,ItalicFont=KaiTi',
                            '-V', 'geometry:a5paper,left=0.5in,right=0.5in,top=0.5in,bottom=0.5in',
                            '--resource-path=' + str(md_file.parent),
                            '--mathjax',
                            '-f', 'markdown+tex_math_dollars+raw_tex',
                            '-V', 'mainfontsize=22pt',
                            '--fail-if-warnings=false'
                        ]
                    )
                    print(f"✅ 成功生成: {pdf_path.name}")
                except Exception as e:
                    print(f"❌ 转换失败: {md_file.name}")
                    print(f"错误信息: {str(e)}")
                finally:
                    # 删除临时文件
                    if temp_md.exists():
                        temp_md.unlink()

if __name__ == "__main__":
    print("=== Markdown批量转换PDF程序 ===")
    print("依赖检查: 请确认已安装以下环境:")
    print("1. Pandoc 3.6+ ()")
    print("2. TeX Live 2024+ (含xelatex引擎)")
    print("3. 中文字体包 (需包含SimSun和SimHei)")
    convert_md_to_pdf()
    print("\n处理完成，按回车键退出...")
    input()